data summary
Laliga_2021 |>
glimpse()
## Rows: 373
## Columns: 18
## $ league <chr> "La_liga", "La_liga", "La_liga", "La_liga", "La_liga", "…
## $ season <chr> "2021/2022", "2021/2022", "2021/2022", "2021/2022", "202…
## $ match_id <chr> "17136", "17138", "17139", "17137", "17140", "17141", "1…
## $ isResult <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
## $ home_id <chr> "146", "239", "261", "157", "158", "152", "148", "138", …
## $ home_team <chr> "Valencia", "Mallorca", "Cadiz", "Osasuna", "Alaves", "C…
## $ home_abbr <chr> "VAL", "MAL", "CAD", "OSA", "ALA", "CEL", "BAR", "SEV", …
## $ away_id <chr> "142", "153", "151", "141", "150", "143", "140", "145", …
## $ away_team <chr> "Getafe", "Real Betis", "Levante", "Espanyol", "Real Mad…
## $ away_abbr <chr> "GET", "BET", "LEV", "ESP", "RMA", "ATL", "SOC", "RVL", …
## $ home_goals <dbl> 1, 1, 1, 0, 1, 1, 4, 3, 0, 0, 1, 0, 1, 0, 1, 1, 1, 3, 0,…
## $ away_goals <dbl> 0, 1, 1, 0, 4, 2, 2, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 3, 1,…
## $ home_xG <dbl> 1.578610, 0.569578, 0.993589, 0.579404, 1.410970, 1.5939…
## $ away_xG <dbl> 1.193260, 0.814085, 0.915954, 0.583698, 2.155510, 1.8050…
## $ datetime <chr> "2021-08-13 19:00:00", "2021-08-14 17:30:00", "2021-08-1…
## $ forecast_win <dbl> 0.4959, 0.2253, 0.3656, 0.2966, 0.1984, 0.2949, 0.9194, …
## $ forecast_draw <dbl> 0.2600, 0.3826, 0.3362, 0.4217, 0.2402, 0.2955, 0.0596, …
## $ forecast_loss <dbl> 0.2441, 0.3921, 0.2982, 0.2817, 0.5614, 0.4096, 0.0210, …
Laliga_2021 |>
head() |>
view()
# wide → long
Laliga_2021 <- Laliga_2021 |>
pivot_longer(cols = c(home_team,away_team),
names_to = "home_away",
values_to = "team") |>
mutate(win = if_else(home_goals > away_goals,"home_team",
if_else(home_goals == away_goals,"draw","away_team")),
point = if_else(home_away == win,3,
if_else(win == "draw",1,0)))
ranking
Ranking <- Laliga_2021 |>
group_by(team) |>
summarise(points = sum(point)) |>
arrange(desc(points))
DT::datatable(Ranking)
top_10 <- Ranking |>
head(10) |>
pull(team)
# 順位の推移
g <- Laliga_2021 |>
select(team,datetime,point) |>
group_by(team) |>
mutate(points = cumsum(point)) |>
filter(team %in% c(top_10)) |>
ggplot(aes(datetime,points, group = 1, color = team)) +
geom_line()
plotly::ggplotly(g)
Home/Away
# xG_xGA
Laliga_2021 |>
mutate(xG = if_else(home_away == "home_team",home_xG,away_xG),
xGA = if_else(home_away == "home_team",away_xG,home_xG)) |>
group_by(team) |>
summarise(xG = mean(xG),
xGA = mean(xGA)) |>
ggplot(aes(xG,xGA, color = team, label = team)) +
geom_point()+
ggrepel::geom_label_repel()+
xlim(c(0,2.5))+
ylim(c(0,2.5))
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Laliga_2021 |>
mutate(xG = if_else(home_away == "home_team",home_xG,away_xG),
xGA = if_else(home_away == "home_team",away_xG,home_xG)) |>
group_by(team) |>
summarise(mean_point = mean(point),
xG = mean(xG),
xGA = mean(xGA)) |>
ggplot(aes(xG,mean_point, color = team, label = team)) +
geom_point()+
geom_abline(slope = 1)+
ggrepel::geom_label_repel()+
xlim(c(0,2.5))+
ylim(c(0,2.5))
## Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
